Figures on spatial transcriptome
Deconvolution on 313 clusters
# preprocessing
library(Seurat)
library(Matrix)
path <- "../HT2021-19134-4_Report20211220/"
sam_name <- c("KS20211101_1", "KS20211101_2", "KS20211102_1", "KS20211102_2")
x <- sam_name[1]
features <- read.table(gzfile(paste(path, x, "/outs/filtered_feature_bc_matrix/features.tsv.gz",
sep = "")), sep = "\t", as.is = T)
ge2an <- features[, 2]
names(ge2an) <- features[, 1]
# a function to compile matrix for each sample
read_raw <- function(x) {
raw_mx <- readMM(gzfile(paste(path, x, "/outs/filtered_feature_bc_matrix/matrix.mtx.gz",
sep = "")))
barcode <- read.table(gzfile(paste(path, x, "/outs/filtered_feature_bc_matrix/barcodes.tsv.gz",
sep = "")), sep = "\t", as.is = T)
features <- read.table(gzfile(paste(path, x, "/outs/filtered_feature_bc_matrix/features.tsv.gz",
sep = "")), sep = "\t", as.is = T)
raw_mx <- data.matrix(raw_mx)
rownames(raw_mx) <- as.character(features[, 1])
colnames(raw_mx) <- as.character(barcode[, 1])
return(raw_mx)
}
raw_mxs <- lapply(sam_name, read_raw)
mx_pos <- lapply(sam_name, function(x) {
res <- read.table(paste(path, x, "/outs/spatial/tissue_positions_list.csv",
sep = ""), sep = ",", as.is = T, row.name = 1)
return(res)
})
for (i in 1:4) print(sum(!colnames(raw_mxs[[i]]) %in% rownames(mx_pos[[i]])))
mx_pos2 <- mapply(function(x, y) {
x <- x[colnames(y), ]
res <- cbind(x[, 5], -x[, 4])
rownames(res) <- rownames(x)
return(res)
}, x = mx_pos, y = raw_mxs)
# To canonical position
mx_pos3 <- lapply(mx_pos2, function(x) {
res <- cbind(-x[, 2], x[, 1])
rownames(res) <- rownames(x)
return(res)
})
# remove spots that do not have tissues by HE images
rm_spot <- lapply(sam_name, function(x) {
res <- read.table(paste(path, x, "/outs/blank.csv", sep = ""), sep = ",",
header = T)
return(as.character(res[, 1]))
})
sapply(rm_spot, length)
if (F) {
# Deconvolution running on node LILAC
library(RCTD)
library(openxlsx)
load("../../2022/result/filter2022/clustering/hvgBysam500_mg2_noNao1_tf291_tfLcc_hvgLcc_rmEndog3/flc_raw_0421.rdata") # THIS IS RAW matrix of ALL USED CELLS!
load("../../2022/result/ficlu2022/ficlu_20220522.rdata")
load("../../2022/result/filter2022/allc185140_original_tot.rdata")
length(sc_inc <- rownames(allc_anno)[!as.character(allc_anno[, 3]) %in%
c("limb", "epithelium", "fibroblast", "miscellaneous")]) # filtering on cell types
dim(ref_mx <- flc_raw[, sc_inc])
rm(flc_raw)
ref_tt <- umi_tot[sc_inc]
ref_tp <- as.character(allc_anno[sc_inc, 1])
length(unique(ref_tp)) # 240
names(ref_tp) <- sc_inc
# common genes
sum(!rownames(ref_mx) %in% rownames(raw_mxs[[1]])) # [1] 0, all in
comg <- rownames(ref_mx)
# make RCTD object
rctd_ref <- Reference(ref_mx, as.factor(ref_tp), ref_tt)
i <- 3
# i<-4
print(paste("i:", i))
rctd_sp <- SpatialRNA(data.frame(mx_pos2[[i]]), raw_mxs[[i]][comg,
], colSums(raw_mxs[[i]]))
rctd_ob <- create.RCTD(rctd_sp, rctd_ref, max_cores = 6, test_mode = F,
CELL_MIN_INSTANCE = 5)
rctd_ob <- run.RCTD(rctd_ob, doublet_mode = "multi") # ~6 hours
rctd_res <- rctd_ob@results
save(rctd_res, file = paste("~/human/spatial/rctd/rctd_ref313_multi_res_",
sam_name[i], ".rdata", sep = ""))
print("finish!")
# nohup Rscript rctd_c313.r > rctd_c313_out.txt 2>&1 &
}
The localization of cell types. Read back result of RCTD from
LILAC.
rctd_wt <- lapply(3:4, function(i) {
load(paste("~/human/spatial/rctd/rctd_ref313_multi_res_", sam_name[i],
".rdata", sep = ""))
res <- t(sapply(rctd_res, function(x) {
res <- rep(0, length(rctd_res[[1]][[1]]))
names(res) <- names(rctd_res[[1]][[1]])
res[x$cell_type_list] <- x$sub_weights
return(res)
}))
rownames(res) <- colnames(raw_mxs[[i]])[colSums(raw_mxs[[i]]) >= 100] # RCTD filter spot by 100
res <- res[, rownames(all_anno_sumt)[rownames(all_anno_sumt) %in% colnames(res)]]
return(res)
})
source("../../scatter_fill2.r")
source("../../cross_embryo/code/plot_genes_on_tsne.r")
# get plot range to make XY not distorted
get_nano_range <- function(x) {
diff <- abs(((max(x[, 2]) - min(x[, 2])) - (max(x[, 1]) - min(x[, 1])))/2)
if ((max(x[, 2]) - min(x[, 2])) > (max(x[, 1]) - min(x[, 1])))
res <- list(c(min(x[, 1]) - diff, max(x[, 1]) + diff), range(x[,
2])) else res <- list(range(x[, 1]), c(min(x[, 2]) - diff, max(x[, 2]) +
diff))
return(res)
}
st_range <- lapply(mx_pos2, get_nano_range)
st_range3 <- lapply(mx_pos3, get_nano_range)
# proportion of each cell type
load("../../2022/result/ficlu2022/ficlu_20220522.rdata") # meta data of cell types
plot_tp <- rownames(all_anno_sumt)[all_anno_sumt[, 1] %in% colnames(rctd_wt[[1]])] # make sure older is consistent with current 'all_anno_sumt'
length(plot_tp <- setdiff(plot_tp, "epidermis-8")) # remove AER
length(plot_name <- paste(plot_tp, all_anno_sumt[plot_tp, "anno2"], sep = ": "))
plot_min <- rep(0, length(plot_tp))
plot_min[plot_tp %in% c("splanchnic LPM-13", "heart-6", "blood-5", "endoderm-6")] <- c(0.15,
0.15, 0.15, 0.3) # low boundary for some type
for (i in 3:4) {
plot_data <- t(as.matrix(rctd_wt[[i - 2]][, plot_tp]))
for (j in 1:length(plot_min)) {
if (plot_min[j] != 0)
plot_data[j, plot_data[j, ] < plot_min[j]] <- 0
}
plot_genes_on_tsne(tsne = mx_pos3[[i]][setdiff(rownames(rctd_wt[[i -
2]]), rm_spot[[i]]), ], mx = plot_data, genes = plot_tp, file_name = paste("ref313_multi_conf_tp_",
sam_name[i], sep = ""), path = paste("../result2022/rctd/", sep = ""),
plot_cex = 0.6, is_order = F, title_lab = plot_name, xx = st_range3[[i]][[1]],
yy = st_range3[[i]][[2]], main_cex = 0.8, data_min = plot_min)
}
# see Supplementary Notes 1 & 2
plot deconvolution on system level and showcase of cell types
library(openxlsx)
source("../../cross_embryo/code/plot_genes_on_tsne.r")
load("../../cross_embryo/result/reclustering3/TJ_clu/col.block.rdata")
tmp <- unique(read.table("../../type_all_col3.txt", sep = "\t", as.is = T,
comment.char = "")[, 2])
col6 <- c("lightgrey", "red", tmp[3], "purple", tmp[2], tmp[5], "blue",
"green4", "orchid", "turquoise", "sienna", tmp[9], "yellow2", "hotpink",
"navy", "steelblue", "skyblue", "pink", "black", tmp[4], rainbow(7))
rctd_pps <- lapply(rctd_wt, function(y) {
res <- apply(y, 1, function(x) {
tapply(x, all_anno_sumt[colnames(y), 3], sum)
})
res <- t(res[names(col.block)[names(col.block) %in% rownames(res)],
])
return(res)
})
library(scatterpie)
## Loading required package: ggplot2
for (i in 1:2) {
plot_mx <- data.frame(mx_pos2[[i + 2]][rownames(rctd_pps[[i]]), ],
rctd_pps[[i]])
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[i + 2]], ]
# pdf(paste('../result2022/rctd/ref313_syspie_',sam_name[i],'.pdf',sep=''))
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx,
cols = colnames(plot_mx)[-(1:2)], color = NA) + coord_equal() +
scale_fill_manual(values = as.character(col.block[colnames(rctd_pps[[1]])]))
# dev.off()
}
# PA and SHF
wt <- rctd_wt[[1]]
plot_tp <- rownames(all_anno_sumt)[all_anno_sumt[, 3] == "craniofacial"][setdiff(1:16,
7:9)]
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[3]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[1 + 2]], ]
# pdf(paste('../result2022/rctd/pa_s3.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = col6[c(1,
14, 9, 2, 17, 10, 7)], labels = c(sort(unique(dp_tp))))

# dev.off()
# heart in S3
wt <- rctd_wt[[1]]
plot_tp <- c("heart-3", "heart-8", "heart-1", "heart-9", "heart-6", "heart-10",
"heart-7", "heart-2", "heart-5", "endothelium-4", "endothelium-10",
"endothelium-6")
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[3]][rownames(plot_wt), ], plot_wt)
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[1 + 2]], ]
colnames(plot_mx)[1:2] <- c("X", "Y")
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/heart_s3.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(atria.cardiomyocyte = col6[2],
atrioventricular.canal = col6[3], endocardial.derived.cell = col6[4],
endocardium = col6[6], epicardial.derived.cell = col6[7], epicardium = col6[8],
sinoatrial.node..SAN. = col6[9], ventricle.cardiomyocyte = col6[10],
others = col6[1]), labels = lab)

# dev.off()
# early and late sclerotome
wt <- rctd_wt[[1]]
plot_tp <- c("somite-1", "somite-3")
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[3]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[1 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/sclerotome_s3.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(sclerotome.early = col6[7],
sclerotome.late = col6[2], others = col6[1]), labels = lab)

# dev.off()
# brain in S4
wt <- rctd_wt[[2]]
plot_tp <- rownames(all_anno_sumt)[all_anno_sumt[, 3] == "neural progenitor"][c(1:4,
6:16)]
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[4]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[2 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/brain_s4.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(anterior.head.fold = col6[2],
anteromedial.cerebral.pole..ACP. = col6[4], dorsal.diencephalon = col6[3],
dorsal.telencephalon = col6[6], mesencephalon = col6[7], midhindbrain.junction..MHB. = col6[8],
ventral.diencephalon.and.ZLI = col6[9], ventral.telencephalon = col6[10],
others = col6[1]), labels = lab)

# dev.off()
# all nervous system in S4
wt <- rctd_wt[[2]]
# write.table( all_anno_sumt[ all_anno_sumt[,3] %in%
# names(col.block)[1:4],],
# file='../result2022/rctd/cns_anno.txt',sep='\t', quote=F,
# col.name=F) # annotate read back annotation
cns_anno <- read.xlsx("../result2022/rctd/cns_anno2.xlsx", sheet = 1, colName = F)
rownames(cns_anno) <- cns_anno[, 1]
plot_tp <- cns_anno[cns_anno[, 6] != "rm", 1]
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[plot_tp] <- cns_anno[plot_tp, 6]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[4]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[2 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/cns_s4.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(anterior.head.fold = col6[5],
anteromedial.cerebral.pole..ACP. = col6[12], dorsal.diencephalon = col6[4],
dorsal.telencephalon = col6[17], mesencephalon = col6[2], midhindbrain.junction..MHB. = col6[8],
placode = col6[5], rhombomere = col6[10], sensory.neuron = col6[14],
spinal.cord = col6[6], ventral.diencephalon.and.ZLI = col6[9], ventral.telencephalon = col6[7],
others = col6[1]), labels = lab)

# dev.off()
# head mesoderm in S3
wt <- rctd_wt[[1]]
plot_tp <- rownames(all_anno_sumt)[all_anno_sumt[, 3] == "head mesoderm"][c(6,
8:11)] # 20 spots >=.05 in all undefined
# plot_tp<- c(plot_tp, c('head mesoderm-5')) # include head muscle
dp_tp <- all_anno_sumt[colnames(wt), "anno2"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[3]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[1 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/head_mesoderm_s3.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(undefined..CYP26C1. = col6[2],
undefined.2 = col6[4], undefined.3 = col6[3], undefined.4 = col6[6],
undefined.5 = col6[7], others = col6[1]), labels = lab)

# ggplot() + geom_scatterpie(aes(x=X, y=Y, r=50), data=plot_mx,
# cols=colnames(plot_mx)[-(1:2)], color=NA) + coord_equal()+
# scale_fill_manual(values=c('head.muscle'=col6[12],'undefined..CYP26C1.'=col6[2],
# 'undefined.2'=col6[4], 'undefined.3'=col6[3], 'undefined.4'=col6[6],
# 'undefined.5'=col6[7], 'others'=col6[1] ), labels=lab ) dev.off()
# The comparison of cell number between scRNA-seq and ST in head
# mesoderm
hm_clu <- rownames(all_anno_sumt)[all_anno_sumt[, 3] == "head mesoderm"]
hmud_clu <- rownames(all_anno_sumt)[all_anno_sumt[, 3] == "head mesoderm"][c(6,
8:11)] # 20 spots >=.05 in all undefined
get_embst <- function(x) {
emb <- gsub("[A-z]", "", sapply(x, function(y) {
strsplit(y, split = "_")[[1]][1]
}))
if (emb == "7")
res <- "CS12" else if (emb %in% c("0", "21", "22"))
res <- "CS13-14" else res <- "CS15-16"
return(res)
}
allc_st <- sapply(rownames(allc_anno), get_embst)
hm_cnum <- t(sapply(hm_clu, function(x) {
c(sum(allc_anno[, 1] == x), sum(allc_anno[, 1] == x & allc_st == "CS13-14"))
}))
# calculate spots in ST
hm_snum <- mapply(function(x, y) {
# colSums(x[,hm_clu]>0.05) # count of spots with > cutoff
colSums(x[, hm_clu]) # sum of proportion
}, x = rctd_wt, y = mx_pos2[3:4])
# pdf('../result2022/rctd/head_meso_cell_spot_num_cs13_s3.pdf',
# height=5, width=5)
par(cex = 2, las = 1, mar = c(4, 4, 1, 1), lwd = 3, pch = 16)
i <- 2
j <- 1
plot(hm_cnum[, i], hm_snum[, j], main = "", xlab = "No. of cells (scRNA-seq)",
ylab = "No. of spots (ST)", frame = F)
points(hm_cnum[hmud_clu, i], hm_snum[hmud_clu, j], col = "red")

# dev.off()
# pdf('../result2022/rctd/head_meso_cell_spot_num_cs13_s4.pdf',
# height=5, width=5)
par(cex = 2, las = 1, mar = c(4, 4, 1, 1), lwd = 3, pch = 16)
i <- 2
j <- 2
plot(hm_cnum[, i], hm_snum[, j], main = "", xlab = "No. of cells (scRNA-seq)",
ylab = "No. of spots (ST)", frame = F)
points(hm_cnum[hmud_clu, i], hm_snum[hmud_clu, j], col = "red")

# dev.off()
cor.test(hm_cnum[hmud_clu, 2], hm_snum[hmud_clu, 1]) # cor=0.92, p=0.02
##
## Pearson's product-moment correlation
##
## data: hm_cnum[hmud_clu, 2] and hm_snum[hmud_clu, 1]
## t = 4.1975, df = 3, p-value = 0.02467
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2284152 0.9950972
## sample estimates:
## cor
## 0.9243948
cor.test(hm_cnum[hmud_clu, 2], hm_snum[hmud_clu, 2]) # cor=-0.29
##
## Pearson's product-moment correlation
##
## data: hm_cnum[hmud_clu, 2] and hm_snum[hmud_clu, 2]
## t = -0.53732, df = 3, p-value = 0.6283
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9343195 0.7933670
## sample estimates:
## cor
## -0.296293
# Correlation is good between cell number and spots number at CS13
# Vascular endothelium
wt <- rctd_wt[[1]]
plot_tp <- c("endothelium-2", "endothelium-7", "endothelium-5")
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[3]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[1 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/rctd/endothelium_s3.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(arterial.endothelium = col6[2],
vascular.endothelium = col6[7], others = col6[1]), labels = lab)

# dev.off()
Summary of identified cell types on ST
tp_rctd <- t(sapply(rownames(all_anno_sumt), function(x, cut = 0.05) {
if (!x %in% colnames(rctd_wt[[1]]))
res <- c(0, "", 0, "", "not_in") else res <- c(sum(rctd_wt[[1]][, x] >= cut), "Yes", sum(rctd_wt[[2]][,
x] >= cut), "Yes", "No")
if (as.numeric(res[1]) > 0 | as.numeric(res[3]) > 0)
res[5] <- "hit"
res <- c(all_anno_sumt[x, c(2, 5)], res)
if (as.numeric(res[3]) == 0)
res[4] <- "-"
if (as.numeric(res[5]) == 0)
res[6] <- "-"
return(res)
}))
colnames(tp_rctd) <- c("Cluster ID", "Annotation", "Number of spots in slice 1 at CS13 with % > 0.05",
"Reasonable position in slice 1 at CS13?", "Number of spots in slice 2 at CS13 with % > 0.05",
"Reasonable position in slice 2 at CS13?", "Hit?")
# write.table( tp_rctd, file='../result2022/rctd/tp_rctd_250.txt',
# sep='\t', quote=F) # Manual determine whether the position is
# reasonable
tp_rctd <- read.xlsx("../result2022/rctd/tp_rctd_250.xlsx", sheet = 1,
rowNames = F)
# reset hit by considering spatial information
tp_rctd_hit <- (tp_rctd[, 3] > 0 & tp_rctd[, 4] == "Yes") | (tp_rctd[,
5] > 0 & tp_rctd[, 6] == "Yes")
tp_rctd[tp_rctd[, 7] == "hit" & (!tp_rctd_hit), 7] <- "No"
rownames(tp_rctd) <- rownames(all_anno_sumt)
sys_rctd_hit <- t(sapply(names(col.block), function(x) {
clu <- rownames(all_anno_sumt)[all_anno_sumt[, 3] %in% x]
stat <- tp_rctd[clu, ]
tt <- nrow(stat)
if (x %in% c("limb", "epithelium", "fibroblast", "miscellaneous"))
res <- c(tt, 0, 0, 0, 0) else {
s3 <- sum(stat[, 3] > 0 & stat[, 4] == "Yes")
s4 <- sum(stat[, 5] > 0 & stat[, 6] == "Yes")
uni <- sum(stat[, 7] == "hit")
oth <- tt - uni
both <- s3 + s4 - uni
res <- c(0, oth, s3 - both, s4 - both, both)
not_in <- sum(stat[, 7] %in% "not_in")
}
return(res)
}))
plot_den <- rep(-1, 5)
plot_den[1] <- 10
# pdf('../result2022/rctd/sys_rctd_hit.pdf',width=10, height=5)
par(cex = 2, las = 1, mar = c(3, 4, 1, 1), lwd = 3)
bar <- barplot(t(sys_rctd_hit), ylab = "Number of cell types", main = "",
border = NA, col = col6[c(1, 5, 10, 3, 2)], names.arg = rep("", 19),
density = plot_den, angle = 45)
text(bar + 1, -3, label = rownames(sys_rctd_hit), cex = 0.5, srt = 45,
xpd = NA, pos = 2)
legend("topright", legend = c("ST section 1 and 2", "ST section 2 only",
"ST section 1 only", "not captured", "not in deconvolution"), fill = rev(col6[c(1,
5, 10, 3, 2)]), density = rev(plot_den), border = NA, cex = 0.4)

# dev.off()
c(sum(sys_rctd_hit[, -(1:2)]), sum(sys_rctd_hit[, -1])) # [1] 223 240, 93%
## [1] 223 240
Confident call of RCTD
i <- 3
load(paste("~/human/spatial/rctd/rctd_ref313_multi_res_", sam_name[i],
".rdata", sep = ""))
length(rctd_res)
## [1] 2909
sum(sapply(rctd_res, function(x) {
Reduce("|", x$conf_list)
}))/length(rctd_res) # 0.958, 2787 2909
## [1] 0.9580612
rctd_conf <- sapply(rctd_res, function(x) {
Reduce("|", x$conf_list)
})
names(rctd_conf) <- rownames(rctd_wt[[1]])
plot_ind <- !names(rctd_conf) %in% rm_spot[[3]]
# pdf(paste('../result2022/rctd/conf_spot_',sam_name[i],'.pdf',sep=''))
par(cex = 2, las = 1, mar = c(1, 1, 1, 1), lwd = 3, pch = 16)
plot(mx_pos2[[i]][names(rctd_conf)[plot_ind], 1], mx_pos2[[i]][names(rctd_conf)[plot_ind],
2], main = "", frame = F, xlab = "", ylab = "", cex = 0.3, col = ifelse(rctd_conf[plot_ind],
"black", col6[2]), xaxt = "n", yaxt = "n") # Spots with confident call

# dev.off()
i <- 4
load(paste("~/human/spatial/rctd/rctd_ref313_multi_res_", sam_name[i],
".rdata", sep = ""))
length(rctd_res)
## [1] 3144
sum(sapply(rctd_res, function(x) {
Reduce("|", x$conf_list)
}))/length(rctd_res) # 0.952, [1] 2994 3144
## [1] 0.9522901
(2787 + 2994)/(2909 + 3144) # [1] 0.9550636, total confident calls
## [1] 0.9550636
rctd_conf <- sapply(rctd_res, function(x) {
Reduce("|", x$conf_list)
})
names(rctd_conf) <- rownames(rctd_wt[[2]])
plot_ind <- !names(rctd_conf) %in% rm_spot[[4]]
# pdf(paste('../result2022/rctd/conf_spot_',sam_name[i],'.pdf',sep=''))
par(cex = 2, las = 1, mar = c(1, 1, 1, 1), lwd = 3, pch = 16)
plot(mx_pos2[[i]][names(rctd_conf)[plot_ind], 1], mx_pos2[[i]][names(rctd_conf)[plot_ind],
2], main = "", frame = F, xlab = "", ylab = "", cex = 0.3, col = ifelse(rctd_conf[plot_ind],
"black", col6[2]), xaxt = "n", yaxt = "n")

# dev.off()
signaling interaction by coexpression on ST
# 1) L-R pair is expressed in two cell types in scRNA-seq; 2)
# co-expressed in spots with two cell types / all spots with two cell
# types, background: co-expressed in all spots / all spots
tct <- lapply(rctd_wt, function(x, cut = 0.2) {
# filter out cell type pairs (both proportion>=.2 in >=5 spots)
pair <- apply(cbind(rownames(x), x), 1, function(y) {
spot <- y[1]
y <- as.numeric(y[-1]) > cut
res <- matrix("", nr = 3, nc = 1)
if (sum(y) >= 2) {
comb <- combn(colnames(x)[y], 2)
comb <- rbind(rep(spot, ncol(comb)), comb)
res <- cbind(res, comb)
}
return(res)
})
pair <- t(do.call(cbind, pair))
pair <- pair[pair[, 1] != "", ]
return(pair)
})
sapply(tct, function(x) {
sum(table(apply(x[, -1], 1, paste, collapse = "_")) >= 5)
}) #
tct_ps <- lapply(tct, function(x, cut = 5) {
info <- table(apply(x[, -1], 1, paste, collapse = "_"))
res <- names(info)[info >= cut]
res <- t(sapply(res, function(y) {
strsplit(y, split = "_")[[1]]
}))
return(res)
})
# remove pairs involved in blood/endothelium, a pair of neuron/prog in
# neural tube
length(sig_rm1 <- rownames(all_anno_sumt)[all_anno_sumt[, 3] %in% c("blood",
"endothelium")])
sig_rm2 <- rownames(all_anno_sumt)[c(18:30, 34:44, 64:89)] # if a pair are both
sapply(tct_ps <- lapply(tct_ps, function(x) {
x <- x[!(x[, 1] %in% sig_rm1 | x[, 2] %in% sig_rm1), ]
x <- x[!(x[, 1] %in% sig_rm2 & x[, 2] %in% sig_rm2), ]
return(x)
}), dim)
# test each pair of LR in each pair of cell types with > N overlapped
# spots
load("../../2022/result/ficlu2022/c313_mn_fr.rdata")
sum(!rownames(c313_mn) %in% rownames(raw_mxs[[1]])) # [1] 0, all in
comg <- rownames(c313_mn)
get_id <- function(x) {
sapply(x, function(y) {
names(ge2an)[ge2an == y][1]
})
}
# test with new LR database from
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7261168/
dim(sigp <- read.table("../../signaling/Cabello-Aguilar2020/LRdb_enID_by_ge2an.txt",
sep = "\t", as.is = T, header = T)) # 3190 2
dim(sigp <- sigp[sigp[, 1] %in% comg & sigp[, 2] %in% comg, ]) # [1] 3184 2
dim(sigp <- cbind(as.character(sigp[, 1]), as.character(sigp[, 2]))) # to character
# 'sigp': 1st column is ligand and 2nd column is receptor
# a function to test by Fisher's exact test: NEED to distinguish which
# cell type express ligands and which express receptors
test_tct_pair <- function(pair_can, tct_can, mx) {
tct_sigp <- apply(pair_can, 1, function(x, cut_exp = 0.5) {
spot <- tct_can[tct_can[, 2] == x[1] & tct_can[, 3] == x[2], 1]
ct <- c(x[1], x[2])
is_exp <- apply(sigp, 1, function(y) {
(c313_mn[y[1], ct[1]] >= cut_exp & c313_mn[y[2], ct[2]] >=
cut_exp) | (c313_mn[y[2], ct[1]] >= cut_exp & c313_mn[y[1],
ct[2]] >= cut_exp)
})
exp <- sigp[is_exp, ]
if (length(exp) == 0)
return(rep("", ))
# which expressed ligands
which_lig <- apply(sigp[is_exp, ], 1, function(y) {
if (c313_mn[y[2], ct[1]] < cut_exp)
lig <- 1 # if ct1 does not express receptor, according to the filter above, ct2 is the type that expresses receptor
else if (c313_mn[y[2], ct[2]] < cut_exp)
lig <- 2 else if (c313_mn[y[1], ct[1]] > c313_mn[y[1], ct[2]])
lig <- 1 # if both types express receptor, the type with higher expression of ligands is the sender.
else lig <- 2
return(lig)
})
dup <- 0 # deal with only 1 line of result
if (length(exp) == 2) {
exp <- rbind(exp, exp)
dup <- 1
}
res <- t(apply(exp, 1, function(y) {
stat <- c(sum(apply(mx[y, spot] >= cut_exp, 2, sum) == 2),
length(spot), sum(apply(mx[y, ] >= cut_exp, 2, sum) ==
2), ncol(mx))
fisher <- fisher.test(matrix(stat, nr = 2))
stat <- c(stat, fisher$p.value, fisher$estimate)
return(stat)
}))
rownames(res) <- apply(exp, 1, function(y) {
paste(ge2an[y], collapse = "_")
})
if (dup == 1) {
res <- t(data.matrix(res[1, ]))
rownames(res) <- apply(exp, 1, function(y) {
paste(ge2an[y], collapse = "_")
})[1]
}
res <- cbind(res, which_lig)
return(res)
})
tct_sigp <- do.call("rbind", mapply(function(x, y) {
ct <- strsplit(x, split = "_")[[1]]
res <- cbind(rep(ct[1], nrow(y)), rep(ct[2], nrow(y)), rownames(y),
y)
return(res)
}, x = names(tct_sigp), y = tct_sigp))
tct_sigp <- cbind(tct_sigp, p.adjust(as.numeric(tct_sigp[, 8]), "BH"))
rownames(tct_sigp) <- rep("", nrow(tct_sigp))
tct_sigp <- tct_sigp[order(as.numeric(tct_sigp[, 8])), ]
tct_sigp <- cbind(tct_sigp, all_anno_sumt[tct_sigp[, 1], "anno2"],
all_anno_sumt[tct_sigp[, 2], "anno2"])
return(tct_sigp)
}
norm_mxs <- lapply(raw_mxs, function(x) {
total <- colSums(x)
res <- t(t(x)/total * 10000)
return(res)
})
s3_tct_sigp <- test_tct_pair(tct_ps[[1]], tct[[1]], mx = norm_mxs[[3]])
s4_tct_sigp <- test_tct_pair(tct_ps[[2]], tct[[2]], mx = norm_mxs[[4]])
# move the 'which ligands column' to the end
s3_tct_sigp <- cbind(s3_tct_sigp[, -10], s3_tct_sigp[, 10])
s4_tct_sigp <- cbind(s4_tct_sigp[, -10], s4_tct_sigp[, 10])
# heatmap on top hit LR pairs (merge results from two slices)
compile_tct_mx <- function(tct_ls, cut = 0.05) {
lr <- lapply(tct_ls, function(x) {
x[as.numeric(x[, 10]) < cut & as.numeric(x[, 9]) > 1, 3]
})
lr <- unique(unlist(lr))
ct <- lapply(tct_ls, function(x) {
apply(x[as.numeric(x[, 10]) < cut & as.numeric(x[, 9]) > 1, 1:2],
1, paste, collapse = "_")
})
ct <- unique(unlist(ct))
ct <- t(sapply(ct, function(x) {
strsplit(x, split = "_")[[1]]
}))
mxs <- lapply(tct_ls, function(x) {
qv <- matrix(1, nr = nrow(ct), nc = length(lr))
rownames(qv) <- apply(ct, 1, paste, collapse = "_")
colnames(qv) <- lr
for (i in 1:nrow(qv)) {
for (j in 1:ncol(qv)) {
if (sum(x[, 1] == ct[i, 1] & x[, 2] == ct[i, 2] & x[, 3] ==
lr[j]) > 0) {
val <- x[x[, 1] == ct[i, 1] & x[, 2] == ct[i, 2] & x[,
3] == lr[j], 10]
qv[i, j] <- val
}
}
}
return(qv)
})
res <- matrix(apply(sapply(mxs, function(x) {
as.numeric(as.vector(x))
}), 1, min), nr = nrow(mxs[[1]]))
rownames(res) <- rownames(mxs[[1]])
colnames(res) <- colnames(mxs[[1]])
return(res)
}
dim(tct_sigp <- compile_tct_mx(tct_ls = list(s3_tct_sigp, s4_tct_sigp),
cut = 0.01))
# remove LRs involved in bad pairs
bad_pr <- c("heart-6_endoderm-6", "IM-2_endoderm-6", "brain-25_neural progenitor-8",
"neural progenitor-7_neural progenitor-8", "brain-25_neural progenitor-28",
"somite-2_endoderm-4", "heart-1_endoderm-6", "somatic LPM-10_somatic LPM-13")
bad_gs <- colnames(tct_sigp)[sapply(colnames(tct_sigp), function(x) {
sum(!rownames(tct_sigp)[tct_sigp[, x] < 0.01] %in% bad_pr)
}) == 0]
tct_sigp <- tct_sigp[!rownames(tct_sigp) %in% bad_pr, !colnames(tct_sigp) %in%
bad_gs]
Summary statistics and heatmap
dim(tct_sigp)
## [1] 46 89
sum(tct_sigp < 0.01) # 134
## [1] 134
sum(tct_sigp < 0.01)/nrow(tct_sigp) # [1] ~3 LR per cell type pairs
## [1] 2.913043
source("../../heatmap3.r")
source("../../heatmap3_invalid.r")
plot_mx <- -log10(tct_sigp)
plot_mx[plot_mx > 4] <- 4
plot_lab1 <- sapply(rownames(tct_sigp), function(x) {
paste(all_anno_sumt[strsplit(x, split = "_")[[1]][1], c(3, 5)], collapse = "_")
})
plot_lab2 <- sapply(rownames(tct_sigp), function(x) {
paste(all_anno_sumt[strsplit(x, split = "_")[[1]][2], c(3, 5)], collapse = "_")
})
# pdf(paste('../result2022/signal/tct_sigp_heatmap_bprop02.pdf',sep=''),
# width=13,height=7) # this is bprop>0.2
tmp <- heatmap3(plot_mx, labRow = paste(plot_lab1, plot_lab2, sep = "....."),
labRow2 = NA, scale = "none", dendrogram = "none", trace = "none",
Rowv = T, Colv = T, symkey = F, density.info = "none", keysize = 1,
col = colorRampPalette(c("white", "red"))(499), color_key_label = "-log10 qv",
color_key_label_cex = 1, margins = c(0, 0), color_key_axis_cex = 1,
key_mar = c(3, 0, 1, 1), labRow_pos = c(2, 4), sepwidth = c(0.1, 0.1),
sepcolor = "black", cexRow = 0.7, cexCol = 0.4, labCol_pos = 3, labCol_las = 2)

# dev.off()
tct_sigp_out <- cbind(t(sapply(rownames(tct_sigp), function(x) {
res <- strsplit(x, split = "_")[[1]]
return(c(all_anno_sumt[res[1], c(3, 5)], all_anno_sumt[res[2], c(3,
5)]))
})), tct_sigp)
colnames(tct_sigp_out)[1:4] <- c("system of type 1", "annotation of type 1",
"system of type 2", "annotation of type 2")
# write.table( tct_sigp_out,
# file='../result2022/signal/tct_sigp_mx_bprop025.txt', quote=F,
# sep='\t') Go with cutoff 0.2. Get 5 out of 7 signaling centers in
# neuroectoderm except two roof plates.
Plot signaling interaction involved in floor plate
fp_tp <- rownames(all_anno_sumt)[grep("floor plate", all_anno_sumt[, 4])][3] # only floor plate in spinal cord
fp_sigp <- tct_sigp[sapply(rownames(tct_sigp), function(x) {
sum(strsplit(x, split = "_")[[1]] %in% fp_tp) > 0
}), ]
dim(fp_sigp <- fp_sigp[, colSums(fp_sigp < 0.01) > 0])
## [1] 4 22
# from which slice
fp_sigp2 <- lapply(list(s3_tct_sigp, s4_tct_sigp), function(x) {
res <- sapply(rownames(fp_sigp), function(y) {
ct <- strsplit(y, split = "_")[[1]]
sapply(colnames(fp_sigp), function(z) {
if (sum(x[, 1] == ct[1] & x[, 2] == ct[2] & x[, 3] == z) >
0)
return(x[x[, 1] == ct[1] & x[, 2] == ct[2] & x[, 3] ==
z, 10]) else return(1)
})
})
return(res)
})
# All from slice 4 which one is the sender
fp_sigp3 <- sapply(rownames(fp_sigp), function(y) {
x <- s4_tct_sigp
ct <- strsplit(y, split = "_")[[1]]
sapply(colnames(fp_sigp), function(z) {
if (sum(x[, 1] == ct[1] & x[, 2] == ct[2] & x[, 3] == z) > 0)
return(x[x[, 1] == ct[1] & x[, 2] == ct[2] & x[, 3] == z, 13]) else return(0)
})
})
# plot cell type and gene expression of FP interactin in S4
plot_gene <- get_id(unique(unlist(lapply(colnames(fp_sigp), function(x) {
strsplit(x, split = "_")[[1]]
}))))
plot_tp <- unique(unlist(lapply(rownames(fp_sigp), function(x) {
strsplit(x, split = "_")[[1]]
})))
# change threshold on expression
dim(plot_data <- norm_mxs[[i]][plot_gene, ])
## [1] 32 3145
plot_min <- 0.5
plot_data[plot_data < plot_min] <- 0
plot_genes_on_tsne(tsne = mx_pos2[[i]][setdiff(rownames(rctd_wt[[i - 2]]),
rm_spot[[i]]), ], mx = plot_data, genes = get_id(c("SHH", "PTCH1",
"HHIP", "NCAM1", "GFRA1")), file_name = "fp_sigp_lr_s4_b", path = paste("../result2022/signal/",
sep = ""), plot_cex = 0.5, is_order = F, data_max = c(10, 5, 5, 5,
3), is_pdf = F)

# put cell types on 1 figure
wt <- rctd_wt[[2]]
plot_tp <- plot_tp[c(1, 2, 3)]
dp_tp <- all_anno_sumt[colnames(wt), "anno1"]
dp_tp[!colnames(wt) %in% plot_tp] <- "others"
plot_wt <- t(apply(wt, 1, function(x) {
tapply(x, dp_tp, sum)
}))
plot_mx <- data.frame(mx_pos2[[4]][rownames(plot_wt), ], plot_wt)
colnames(plot_mx)[1:2] <- c("X", "Y")
plot_mx <- plot_mx[!rownames(plot_mx) %in% rm_spot[[2 + 2]], ]
lab <- c(setdiff(sort(unique(dp_tp)), "others"), "others")
# pdf(paste('../result2022/signal/floor_plate_s4.pdf',sep=''),height=6)
ggplot() + geom_scatterpie(aes(x = X, y = Y, r = 50), data = plot_mx, cols = colnames(plot_mx)[-(1:2)],
color = NA) + coord_equal() + scale_fill_manual(values = c(floor.plate = col6[2],
pMN = col6[3], sclerotome.late = col6[16], others = col6[1]), labels = lab) # 'floor.plate.rhombomere'=col6[4],

# dev.off()